* ==============================================================================
* Clean WVS collapse and prepare for merge with 301 file
* ==============================================================================

clear 
set more off 
set maxvar 10000

use "$dta_files/wvs_collapse_nuts1.dta"
append using "$dta_files/wvs_collapse_nuts2.dta"
append using "$dta_files/wvs_collapse_eu_country.dta"
append using "$dta_files/wvs_collapse_us_ca_country.dta"
append using "$dta_files/wvs_collapse_us_ca_region.dta"

gen nuts=nuts1 
replace nuts=nuts2 if nuts==""
replace nuts=country if nuts=="" & country!="US" & country!="CA"

drop nuts1 nuts2  

replace nuts = "Entire U.S." if region==. & country=="US"
replace nuts = "US: New England" if region== 840001 
replace nuts = "US: Middle Atlantic States" if region==840002
replace nuts = "US: South Atlantic" if region==840003	
replace nuts = "US: East South Central" if region==840004	
replace nuts = "US: West South Central" if region==840005	
replace nuts = "US: East North Central" if region==840006	
replace nuts = "US: West North Central" if region==840007	
replace nuts = "US: Rocky Mountain state" if region==840008	
replace nuts = "US: Northwest" if region==840009
replace nuts = "US: California" if region==840010

drop if region ==. & country=="CA" & sample==7
replace nuts =  "Canada" if region ==. & country=="CA" 
replace nuts =  "CA: Prince Edward Island" if region == 124001
replace nuts =  "CA: Nova Scotia"  if region ==  124002
replace nuts =  "CA: New Brunswick"  if region == 124003
replace nuts =  "CA: Quebec"  if region == 124004
replace nuts =  "CA: Ontario"  if region ==  124005
replace nuts =  "CA: Manitoba"  if region ==  124006
replace nuts =  "CA: Saskatchewan"  if region ==  124007
replace nuts =  "CA: Alberta" if region ==  124008
replace nuts =  "CA: British Columbia"  if region == 124009
replace nuts =  "CA: Newfoundland and Labrador"  if region == 124010

order country 

* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Deal with Splits.
* Create duplicates so that one of the regions that split (i.e. FI1B) is tied to 
* one duplicate, and the other (i.e. FI1C) is tied to another. 
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

gen split=.
replace split=1 if  nuts=="FI1B"

local N = _N 
forval i = 1/`N' { 
	expand 2 in `i' if split==1
}

bys nuts: gen n= _n

replace nuts="FI1C" if nuts=="FI1B" & n==2

drop split n

********************************************************************************

quiet foreach var of varlist s016-freq_x055_10 {
	sum `var'
	if r(N)==0 {
		drop `var'
		di "`var'"
	}
}

quiet foreach var of varlist s016-x055 {
	capture drop xxx
	egen xxx=count(`var'), by(country)
	capture drop xxx1
	egen xxx1=group(xxx)
	sum xxx1
	if r(max)<3 {
		drop `var'
		di "`var'"
	}
}

drop freq_x*
drop freq_w*
drop freq_v*
drop freq_u*

* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* Create duplicates to merge with U.S. states
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

expand 7 if nuts == "US: New England" 
expand 4 if nuts == "US: Middle Atlantic States" 
expand 10 if nuts == "US: South Atlantic"
expand 5 if nuts == "US: East South Central"
expand 5 if nuts == "US: West South Central"
expand 6 if nuts == "US: East North Central"
expand 8 if nuts == "US: West North Central"
expand 9 if nuts == "US: Rocky Mountain state"
expand 5 if nuts == "US: Northwest"
expand 2 if nuts == "US: California"

bys nuts: gen n = _n

replace nuts = "US: CT" if nuts=="US: New England" & n==2 
replace nuts = "US: ME" if nuts=="US: New England" & n==3
replace nuts = "US: MA" if nuts=="US: New England" & n==4
replace nuts = "US: NH" if nuts=="US: New England" & n==5
replace nuts = "US: RI" if nuts=="US: New England" & n==6
replace nuts = "US: VT" if nuts=="US: New England" & n==7

replace nuts = "US: NJ" if nuts=="US: Middle Atlantic States" & n==2 
replace nuts = "US: NY" if nuts=="US: Middle Atlantic States" & n==3
replace nuts = "US: PA" if nuts=="US: Middle Atlantic States" & n==4 

replace nuts = "US: DE" if nuts =="US: South Atlantic" & n==2 
replace nuts = "US: DC" if nuts =="US: South Atlantic" & n==3
replace nuts = "US: FL" if nuts =="US: South Atlantic" & n==4 
replace nuts = "US: GA" if nuts =="US: South Atlantic" & n==5
replace nuts = "US: MD" if nuts =="US: South Atlantic" & n==6 
replace nuts = "US: NC" if nuts =="US: South Atlantic" & n==7 
replace nuts = "US: SC" if nuts =="US: South Atlantic" & n==8
replace nuts = "US: VA" if nuts =="US: South Atlantic" & n==9
replace nuts = "US: WV" if nuts =="US: South Atlantic" & n==10

replace nuts = "US: AL" if nuts == "US: East South Central" & n ==2 
replace nuts = "US: KY" if nuts == "US: East South Central" & n ==3 
replace nuts = "US: MS" if nuts == "US: East South Central" & n ==4 
replace nuts = "US: TN" if nuts == "US: East South Central" & n ==5 

replace nuts = "US: LA" if nuts == "US: West South Central" & n ==2
replace nuts = "US: AR" if nuts == "US: West South Central" & n ==3
replace nuts = "US: OK" if nuts == "US: West South Central" & n ==4
replace nuts = "US: TX" if nuts == "US: West South Central" & n ==5

replace nuts = "US: IL" if nuts == "US: East North Central" & n ==2
replace nuts = "US: IN" if nuts == "US: East North Central" & n ==3
replace nuts = "US: MI" if nuts == "US: East North Central" & n ==4
replace nuts = "US: OH" if nuts == "US: East North Central" & n ==5
replace nuts = "US: WI" if nuts == "US: East North Central" & n ==6

replace nuts = "US: IA" if nuts == "US: West North Central" & n ==2
replace nuts = "US: KS" if nuts == "US: West North Central" & n ==3
replace nuts = "US: MN" if nuts == "US: West North Central" & n ==4
replace nuts = "US: MO" if nuts == "US: West North Central" & n ==5
replace nuts = "US: NE" if nuts == "US: West North Central" & n ==6
replace nuts = "US: ND" if nuts == "US: West North Central" & n ==7
replace nuts = "US: SD" if nuts == "US: West North Central" & n ==8

replace nuts = "US: AZ" if nuts == "US: Rocky Mountain state" & n==2
replace nuts = "US: CO" if nuts == "US: Rocky Mountain state" & n==3
replace nuts = "US: ID" if nuts == "US: Rocky Mountain state" & n==4
replace nuts = "US: MT" if nuts == "US: Rocky Mountain state" & n==5
replace nuts = "US: NV" if nuts == "US: Rocky Mountain state" & n==6
replace nuts = "US: NM" if nuts == "US: Rocky Mountain state" & n==7
replace nuts = "US: UT" if nuts == "US: Rocky Mountain state" & n==8
replace nuts = "US: WY" if nuts == "US: Rocky Mountain state" & n==9

replace nuts = "US: AK" if nuts == "US: Northwest" & n==2 
replace nuts = "US: HI" if nuts == "US: Northwest" & n==3
replace nuts = "US: OR" if nuts == "US: Northwest" & n==4
replace nuts = "US: WA" if nuts == "US: Northwest" & n==5

replace nuts = "US: CA" if nuts == "US: California" & n==2

save "$dta_files/WVS_collapse_only_cleaned", replace

* ==============================================================================
* Merge with list of countries in Sample
* ==============================================================================

use "$dta_files/step_0400_prepare_WVS_merge", clear

* Give Swiss regions NUTS level
replace nuts_level= 0 if nuts=="CH"
replace nuts_level= 1 if nuts=="CH0"
forvalues x=1/7 {
	quietly replace nuts_level=2 if nuts=="CH0`x'"
}
forvalues x=10/70 {
	quietly replace nuts_level=3 if nuts=="CH0`x'"  
}


gen nuts1 = nuts if nuts_level==1
gen nuts2 = nuts if nuts_level==2
gen nuts3 = nuts if nuts_level==3

quietly replace country = substr(nuts, 1,2) if country==""

gen not_in_WVS = 0 

compress

sort nuts year 
merge m:1 nuts country using "$dta_files/WVS_collapse_only_cleaned", gen(_merge_WVS)

replace not_in_WVS=1 if _merge_WVS==1 

* Tab observations with WVS/EVS merge
bys nuts_level: tab nuts if  _merge_WVS==3 
* Tab observations with NO WVS/EVS merge
bys nuts_level: tab nuts if _merge_WVS!=3

* ==============================================================================
* Collapse over composite NUTS
* ==============================================================================

tempfile precollapse
save `precollapse.dta'

drop if nuts_composite_flag==1

tempfile non_composite_nuts
save `non_composite_nuts.dta'

use `precollapse.dta', clear

keep if nuts_composite_flag==1
replace nuts = composite_nuts if composite_nuts!=""

tempfile composite_nuts
save `composite_nuts.dta'

local variables s016 a0* a1* b0* c0* d0* e0* e1* f0* f1* g0* u0* v0* w0* x0* y0*

* WVS variables, collapse composite nuts, weighted by sample size. 
collapse (mean) `variables' not_in_WVS [aw=sample_size], by(nuts) fast

append using `non_composite_nuts.dta'

********************************************************************************

replace nuts_level=2 if country=="US"
replace nuts_level=2 if country=="CA"
replace nuts_level=1 if nuts=="US: New England"|nuts== "US: Middle Atlantic States"| /// 
nuts== "US: South Atlantic"| nuts == "US: East South Central"| /// 
nuts == "US: West South Central"| nuts == "US: East North Central"| ///
nuts == "US: West North Central"| nuts == "US: Rocky Mountain state"| ///
nuts == "US: Northwest"| nuts == "US: California"

replace nuts_level=0 if nuts==country|nuts=="Canada"|nuts=="Entire U.S."
replace nuts_level=2 if nuts=="FI1C"

*merge in nuts level, if missing
merge m:1 nuts using "$dta_files/nuts_level.dta", update 
drop _merge

* ==============================================================================
* Fill in missing WVS data from nearby regions for NUTS 
* ==============================================================================

quiet foreach var of varlist s016 a* b* c0* d* e* f* g* u* v* w* x* ///
			     freq* sample_size nuts_level s020 {
	tabstat `var' if (nuts=="DEA2"|nuts=="DEB1"|nuts=="DEC0"), stat(mean) save
	matrix mean=r(StatTotal)
	local mean = mean[1,1]
	replace `var' = `mean' if nuts=="DEB2"
}
replace not_in_WVS = 0 if nuts=="DEB2"

quiet foreach var of varlist s016 a* b* c0* d* e* f* g* u* v* w* x* ///
			     freq* sample_size nuts_level s020 {
	tabstat `var' if (nuts=="UKM2"|nuts=="UKM3"|nuts=="UKM6") , stat(mean) save
	matrix mean=r(StatTotal)
	local mean = mean[1,1]
	replace `var' = `mean' if nuts=="UKM5"
}
replace not_in_WVS = 0 if nuts=="UKM5"

********************************************************************************

drop composite_count nuts_component composite_total 

replace country=substr(nuts,1,2) if nuts==""

tempfile full_wvs
save `full_wvs.dta'

bys nuts_level: tab nuts if not_in_WVS==1
keep nuts a* b* c* d* e* f* g* u* v* w* x* s016 freq* sample_size nuts_level s020

saveold "$dta_files/step0403_wvs_collapsed_full.dta", replace

use `full_wvs.dta', clear

drop if not_in_WVS==1
keep nuts a* b* c* d* e* f* g* u* v* w* x* s016 freq* sample_size nuts_level s020

saveold "$dta_files/step0403_wvs_collapsed_not_in_WVS_dropped.dta", replace
